**************************************************************************************************************************************************
******************* Code for 'Are Investment Tax Breaks Effective: Australian Evidence' *******************
***************************************************** Code for RDD data construction ******************************************************************

**************************************************DATE: Jun 2024 ******************************************************************************
*********************************************************************************************************************************************
*** Code strcuture
*00. Preliminaries and globals
*01. Make  BIT income varaibles
*02 CAPEX data iniatial cleaning
*03 Bring in indicator if company in the grou




************* 00. Preliminaries **********		
clear all
global data ""
global savefile ""
global ids id_bg // use entreprise group merging
sysdir set PLUS ""
sysdir set PERSONAL ""
mata: mata mlib index

************* 01. Bring together BIT and BAS data **********					  
* Load data on bas
use "$data\bas_frame_20.dta", clear
** merge in bit data
merge m:1 id tsid  using "$data\bit_frame_21.dta", keep(master match) keepusing(income income_l income_l2) 
drop _merge

compress

************** 02. Clean data **********					  

drop if id == "" // dropping as wont have any bit or frame data
** Some fimographics

g gov =(latest_sisca08==3000)  // might want to drop these
g non_fin_prof = (latest_sisca08==4000 | latest_sisca08==1001 | latest_sisca08==1009)
g non_prof = (latest_sisca08==5000)

drop if gov == 1 // remove public fims


** use panel anzsic measure (not cross-sectional)
g industry = latest_anzsic06

*Generate indicator for divisions
g division = "A" if industry < 600
replace division = "B" if industry > 599 & industry < 1100
replace division = "C" if industry > 1099 & industry < 2600
replace division = "D" if industry > 2599 & industry < 3000
replace division = "E" if industry > 2999 & industry < 3300
replace division = "F" if industry > 3299 & industry < 3900
replace division = "G" if industry > 3899 & industry < 4400
replace division = "H" if industry > 4399 & industry < 4600
replace division = "I" if industry > 4599 & industry < 5400
replace division = "J" if industry > 5399 & industry < 6200
replace division = "K" if industry > 6199 & industry < 6500
replace division = "L" if industry > 6599 & industry < 6800
replace division = "M" if industry > 6799 & industry < 7100
replace division = "N" if industry > 7199 & industry < 7400
replace division = "O" if industry > 7499 & industry < 8000
replace division = "P" if industry > 7999 & industry < 8300
replace division = "Q" if industry > 8399 & industry < 8800
replace division = "R" if industry > 8899 & industry < 9300
replace division = "S" if industry > 9399 & industry < 9999

gen mining=(division == "B")
rename tsid year
egen date=group(year quarter)
save "$data\bas_bit_frame_adjs_20.dta", replace // save EUM version of the dataset



************** 03. EG level version of dataset **********					  
use "$data\bas_frame_20.dta" , clear

drop if id == "" // dropping as wont have any bit or frame data

compress
** Checking bg_id in case it changes over time
bysort id bg_id: g first_bg_fr = (_n==1 & bg_id!="") 
bysort id : egen bg_counts_fr = total(first_bg_fr)
tab bg_counts_fr // any change bg?
g mult_bg = (bg_counts_fr>1 & bg_counts_fr!=.) // flag for these changing ones. 

g id_bg = bg_id // make joint identifier
replace id_bg = id if bg_id==""
gsort tsid quarter bg_id -turnover
egen firmid = group(id)

merge m:1 id tsid  using "$data\bit_frame_21.dta", keep(master match) keepusing(income income_l income_l2) // bring in lagged income aat firm level
drop _merge

** collapse to bg level
ds tsid id  quarter bg_id id_bg latest_anzsic06 latest_sisca08 firmid, not
compress
collapse(sum) `r(varlist)'  (firstnm)  latest_anzsic06 latest_sisca08 (count) sub_firms = firmid, by(tsid   quarter id_bg)

** bring in aggregated bg level income
merge m:1 id_bg tsid  using "$data\bit_frame_bg_21.dta", keep(master match)  
drop _merge
** eg level income if eg, otherwise just basic summed
replace income = income_eg if income_eg!=.
replace income_l = income_l_eg if income_l_eg!=.
replace income_l2 = income_l2_eg if income_l2_eg!=.

bysort id_bg: egen mode_sis = mode(latest_sisca08), minmode
bysort id_bg: egen mode_sis_eg = mode(latest_sisca08_eg), minmode
g sisca = mode_sis_eg // varaible that is most freq sisca for eg, and if not eg most frequent id based
replace sisca=mode_sis if sisca==. 


g gov =(sisca==3000)  // might want to drop these
g non_fin_prof = (sisca==4000 | latest_sisca08==1001 | latest_sisca08==1009)
g non_prof = (sisca==5000)

drop if gov == 1

** Checking if industry ends up changing due to collpase - modeal
bysort id_bg: egen mod_ind = mode(latest_anzsic06), minmode
bysort id_bg: egen mode_ind_eg = mode(latest_anzsic06_eg), minmode

** choice here to use the anszsic at eg, then anzsic at id, the capex. in all cases taking model outcoome

g industry = mode_ind_eg 
replace industry = mod_ind if industry == .  
count if industry == . // none missing by end


*Generate indicator for divisions
g division = "A" if industry < 600
replace division = "B" if industry > 599 & industry < 1100
replace division = "C" if industry > 1099 & industry < 2600
replace division = "D" if industry > 2599 & industry < 3000
replace division = "E" if industry > 2999 & industry < 3300
replace division = "F" if industry > 3299 & industry < 3900
replace division = "G" if industry > 3899 & industry < 4400
replace division = "H" if industry > 4399 & industry < 4600
replace division = "I" if industry > 4599 & industry < 5400
replace division = "J" if industry > 5399 & industry < 6200
replace division = "K" if industry > 6199 & industry < 6500
replace division = "L" if industry > 6599 & industry < 6800
replace division = "M" if industry > 6799 & industry < 7100
replace division = "N" if industry > 7199 & industry < 7400
replace division = "O" if industry > 7499 & industry < 8000
replace division = "P" if industry > 7999 & industry < 8300
replace division = "Q" if industry > 8399 & industry < 8800
replace division = "R" if industry > 8899 & industry < 9300
replace division = "S" if industry > 9399 & industry < 9999

gen mining=(division == "B")
rename tsid year
egen date=group(year quarter)
save "$data\bas_bit_bas_frame_adjs_bg_20.dta", replace

